Web: https://www.meetup.com/Tel-Aviv-Deep-Learning-Bootcamp/events/241762893/
Notebooks: On GitHub
Shlomo Kashani
In [ ]:
# !pip install pycuda
%reset -f
import pycuda
from pycuda import compiler
import pycuda.driver as cuda
import numpy
import numpy as np
from pycuda.compiler import SourceModule
cuda.init()
print("%d device(s) found." % cuda.Device.count())
for ordinal in range(cuda.Device.count()):
dev = cuda.Device(ordinal)
print "Device #%d: %s" % (ordinal, dev.name())
print cuda
In [5]:
! watch --color -n1.0 gpustat
In [15]:
import pycuda.autoinit
# a = np.random.uniform(low=1, high=20, size=(10,))
a = numpy.arange(-100000, 100000, 1)
a = a.astype(numpy.float32)
ARR_SIZE = numpy.int32(a.shape[-1])
print ARR_SIZE
a_gpu = cuda.mem_alloc(a.nbytes)
xout_gpu = cuda.mem_alloc(a.nbytes)
cuda.memcpy_htod(a_gpu, a)
xout_gpu=cuda.mem_alloc_like(a)
# size_gpu=cuda.mem_alloc_like(size)
mod = SourceModule("""
__global__ void sigmoid(float* a, float* b, int size)
{
int index = blockDim.x * blockIdx.x + threadIdx.x;
if (index < size)
b[index] = 1.0f / (1.0f + exp(-1.0f * a[index]));
}
""")
func = mod.get_function("sigmoid")
def sigmoidGPU():
func(a_gpu, xout_gpu,ARR_SIZE, block=(ARR_SIZE/1024,1,1))
a_sigmoid = numpy.empty_like(a)
cuda.memcpy_dtoh(a_sigmoid, xout_gpu)
return a_sigmoid
# print sigmoidGPU()
from scipy.special import expit
y = expit(a)
# print ("__________________________________")
# print y
In [16]:
import matplotlib.pyplot as plt
plt.plot(a,y)
plt.text(4,0.8,r'$\sigma(x)=\frac{1}{1+e^{-x}}$',fontsize=15)
plt.legend(loc='lower right')
plt.show()
In [17]:
import timeit
n_iter = ARR_SIZE
rounds = 1000 # for timeit
print 'numpy', timeit.timeit(lambda:
expit(a),
number=rounds)
print 'pycuda', timeit.timeit(lambda:
sigmoidGPU(),
number=rounds)
In [ ]: